import pymysql
import pandas as pd
import re

# with open("part-r-000001", 'r', encoding="utf-8") as f:
#     line = f.readline()
#     con = pymysql.connect('localhost', 'root', '123', 'flask_demo')
#     cursor = con.cursor()
#     i = 0
#     while line:
#         line = f.readline()
#         i = i + 1
#         split = line.split("|")
#         if len(split) !=3:
#             print(i, line)
#             break
#         sql = 'insert into shouji values ("{}","{}","{}")'.format(split[0], split[1], split[2])
#         cursor.execute(sql)
#     con.commit()


df = pd.read_csv('part-r-00000', sep='|', names='pz,ys,cc'.split(','))
print(df.columns)
df.dropna(inplace=True)
# df = df[df['pz'].apply(lambda s: "None|" not in s)]
pd.set_option('display.max_row', 5000)
df.pz = df.pz.apply(lambda x: str(x).replace(' ', ''))
df.ys = df.ys.apply(lambda x: str(x).replace(' ', ''))
df.cc = df.cc.apply(lambda x: str(x).replace(' ', ''))
df.pz = df.pz.apply(lambda x: re.sub(r"（.*?）", '', str(x)))
df.pz = df.pz.apply(lambda x: re.sub(r"huawei|HUAWEI", "华为", str(x)))
df.pz = df.pz.apply(lambda x: re.sub(r"None|HTC|守护宝|摩托罗拉|小辣椒|天语|美图|酷派|中兴|一加|努比亚|黑鲨|联想", '其他', str(x)))

print(df.groupby(by=['pz','ys']).count())
print(df.shape)
con = pymysql.connect('localhost', 'root', '123', 'flask_demo')
cursor = con.cursor()
for i in df.values:
    sql = 'insert into shouji values ("{}","{}","{}")'.format(i[0], i[1], i[2])
    cursor.execute(sql)
con.commit()
